Introduction

In this document, we will give you several examples of Bayesian data analysis.

Load packages

library(dplyr)
library(tibble)
library(purrr)
library(tidyr)
library(forcats)
library(gtools)
library(patchwork)
library(broom)
library(broom.mixed)
library(modelr)
library(brms)
library(tidybayes)
library(ggdist)
library(bayesplot)
library(ggplot2)
library(knitr)
BRM_BACKEND <- ifelse(require("cmdstanr"), 'cmdstanr', 'rstan')

Dataset

The following dataset is from experiment 2 of “How Relevant are Incidental Power Poses for HCI?” (Jansen & Hornbæk, 2018). Study participants were asked to either make an expansive posture or a constrictive posture before performing a task. The experiment investigated whether posture could potentially have an effect on risk taking behavior.

First, we load the data.

pose_df = readr::read_csv("data/poses_data.csv", show_col_types = FALSE) %>%
  mutate(condition) %>%
  group_by(participant)
head(pose_df %>% select(participant, condition, change))
participant condition change
1 expansive 3.362832
2 constrictive 29.147982
3 expansive 25.409836
4 constrictive 54.069767
5 expansive -36.644592
6 constrictive 29.756098

Let’s plot out data

wrap_plots(
pose_df %>% 
  ggplot(aes(x = change, fill = condition)) +
    geom_histogram(binwidth = 10) +
    coord_cartesian(xlim = c(-50, 200)) +
    scale_color_theme() + 
    theme_density_x,
pose_df %>% 
  ggplot(aes(x = change)) +
    geom_point(aes(y = 0, color = condition), size = 2) +
    coord_cartesian(xlim = c(-50, 200)) +
    scale_color_theme() + 
    theme_density_x,
nrow = 2)

The data has been aggregated for each participant: - condition = expansive indicates expansive posture, and condition = constrictive indicates constrictive posture - The dependent variable is change which indicates the percentage change in risk-taking behavior. Thus, it is a continuous variable.

For the purposes of this demo, we are only concerned with these two variables. We can ignore the other variables for now.

Intuition of Bayesian Statistics

The Bayesian t-test (BEST) assumes that the data in the two conditions arises from two separate t-distributions. In the following section, we will describe the process for one of the conditions in the experiment.

We will use the \(Normal(\mu = 20, \sigma = 20)\) as the prior distribution.

First, we define some functions for manual calculation of the posterior normal distribution:

sigma_post = function(sigma_prior, sigma, n = 1) {
  sqrt(1 / (1 / (sigma_prior^2) + n / (sigma^2)))
}
mu_post = function(mu_prior, sigma_prior, mu, sigma, n = 1) {
  tau = sigma_post(sigma_prior, sigma, n)
  (tau^2 / sigma_prior^2)*mu_prior + (n * tau^2 / sigma^2)*mu
}
d.p2 = tibble(
  group = c("prior", "expansive", "constrictive", "posterior"), 
  mu = c(0, 32.82, 31.61, mu_post(0, 20, 32.82, 7.52)), 
  sd = c(20, 7.52, 7.06, sigma_post(20, 7.52))
) %>%
  mutate(
    cutoff_group = list(c(1:7)),
    cutoff = list(c(0, 15, 25, 30, 32.82, 40, 100))
  ) %>%
  unnest(c(cutoff_group, cutoff)) %>%
  mutate(
    cutoff = if_else(group == 'prior', 100, cutoff),
    x = map(cutoff, ~seq(from = -100, ., by = 0.1)),
    y = pmap(list(x, mu, sd), ~ dnorm(..1, ..2, ..3))
  )

In the plot below, we show the raw data distribution for the two conditions:

p1 = pose_df %>%
  ggplot() +
  geom_point(aes(x = change, y = condition, colour = condition), 
             position = position_jitter(height = 0.1), alpha = 0.7) +
  scale_color_theme() + 
  labs(y = "Condition") +
  theme(
    legend.position = "none", 
    axis.line.y = element_blank(), 
    axis.ticks.y = element_blank(),
    axis.title.y = element_blank()
  ) +
  scale_x_continuous(limits = c(-150, 250), breaks = seq(-150, 250, by = 50))

p2.blank = tibble(y = c("expansive", "constrictive"), x = 0) %>%
  ggplot(aes(x, y)) +
  scale_color_theme() + 
  theme_density

cowplot::plot_grid(p2.blank, p1, nrow = 2)

First, we define a function to help us plot different cutoff groups.

plot_preliminary <- function(data, group_num, group_name, linewidth_in = 1){
  data %>%
  filter(cutoff_group == group_num & group %in% group_name) %>%
  unnest(c(x, y)) %>%
  ggplot(aes(x, y)) +
  #geom_line(aes(color = group), size = 1) +
  # density
  geom_area(aes(fill = group, color = group, alpha = as.character(group)), position = "identity", linewidth = linewidth_in) +
  scale_x_continuous(limits = c(-150, 250)) +
  scale_y_continuous(limits = c(0, 0.1)) +
  scale_alpha_manual(
    values = c(
      'posterior' = .5,
      'prior' = .3,
      'expansive' = .3,
      'constrictive' = .3
    ),
  ) + 
  scale_color_theme() +
  theme_density +
  theme(legend.position = 'none')
}
cowplot::plot_grid(
  plot_preliminary(d.p2, 0, NULL), 
  p1, nrow = 2)

Next, we plot the prior density:

cowplot::plot_grid(
  plot_preliminary(d.p2, 1, 'prior'), 
  p1, nrow = 2)

Then we describe step by step, how the likelihood is computed:

cowplot::plot_grid(
  plot_preliminary(d.p2, 1, 'expansive'), 
  p1, nrow = 2)

cowplot::plot_grid(
  plot_preliminary(d.p2, 2, 'expansive'), 
  p1, nrow = 2)

cowplot::plot_grid(
  plot_preliminary(d.p2, 3, 'expansive'), 
  p1, nrow = 2)

cowplot::plot_grid(
  plot_preliminary(d.p2, 5, 'expansive'), 
  p1, nrow = 2)

cowplot::plot_grid(
  plot_preliminary(d.p2, 6, 'expansive'), 
  p1, nrow = 2)

cowplot::plot_grid(
  plot_preliminary(d.p2, 7, 'expansive'), 
  p1, nrow = 2)

cowplot::plot_grid(
  plot_preliminary(d.p2, 7, c('prior', 'expansive')), 
  p1, nrow = 2)

We want to compute the posterior, which is the product of the prior and likelihood:

cowplot::plot_grid(
  plot_preliminary(d.p2, 7, c('prior', 'expansive'), linewidth_in = 0), 
  p1, nrow = 2)

cowplot::plot_grid(
  plot_preliminary(d.p2, 7, c('posterior')), 
  p1, nrow = 2)

Of course, if you have gganimate and magick working on your computer, you can try the following code to get a gif! Here, we are not going to show the code.

Model 1: Equal standard deviations

Understanding the data

We plot the data distribution, the empirical density curve (blue line), and the theoretical density curve (black line).

pose_df %>%
  mutate(c = as.factor(condition)) %>%
  ggplot(aes(x = change)) +
  # data distribution
  geom_histogram(
    aes(y = ..density..),
    binwidth = 10,
    fill = theme_yellow,
    alpha = .75,
    color = 'white'
  ) +
  # empirical density curve
  geom_density(size = 1,
               adjust = 1.5,
               color = theme_blue) +
  #  theoretical density curve, a t distribition with mean = 16, sd = 19, and nu = 6
   geom_function(
    color = "#222222",
    linetype = 'dashed',
    fun = function(x)
      dstudent_t(x, mu = 16,  sigma = 39, df = 6),
    size = 1
  ) + 
  scale_x_continuous(limits = c(-200, 200)) +
  theme(
    axis.line.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.title.y = element_blank(),
    axis.text.x = element_text(size = 20),
    axis.title.x = element_text(size = 24)
  )

Step 1: model specification

Here we use a mathematical expression for the model above.

\[ \begin{align} y_{i} &\sim \mathrm{Student\_t}(\mu, \sigma_{0}, \nu_{0}) \\ \mu &= \beta_{0} + \beta_{1} * x_i \\ \sigma_{0} &\sim \mathrm{HalfNormal}(0, 10) \\ \beta_{0} &\sim \mathrm{?} \\ \beta_{1} &\sim \mathrm{Normal}(0,2) \\ \nu_{0} &\sim \mathrm{?} \\ i & \in \{\mathrm{expansive}, \mathrm{constrictive}\} \end{align} \]

We translate this thought into brms formula using the function bf.

model.1.formula <- bf(
                      # we think change is affected by different conditions.
                      change ~ condition,
                      # to tell brms which response distribution to use
                      family = student()
                    )

Now we can use get_prior() to inspect the available priors and formula. As what we learned, we have priors for

tibble(get_prior(model.1.formula, pose_df))
prior class coef group resp dpar nlpar lb ub source
b default
b conditionexpansive default
student_t(3, 22.6, 38.8) Intercept default
gamma(2, 0.1) nu 1 default
student_t(3, 0, 38.8) sigma 0 default

prior checks

We look at the default priors from brms.

cowplot::plot_grid(
tibble(x = qstudent_t(ppoints(n = 500), df = 3, mu = 22.6, sigma = 38.8)) %>% 
  ggplot() +
  geom_density(aes(x = x), fill = theme_yellow, color = NA) +
  ggtitle('student_t(3, 22.6, 38.8) for Intercept') +
  coord_cartesian(xlim = c(-300, 300),expand = c(0)),
tibble(x = qgamma(ppoints(n = 500), shape = 2, rate = .1)) %>% 
  ggplot() +
  geom_density(aes(x = x), fill = theme_yellow, color = NA) +
  ggtitle('Gamma(2, 0.1) for nu') +
  coord_cartesian(xlim = c(1, 100), expand = 0),
tibble(x = qstudent_t(ppoints(n = 500), df = 3, mu = 0, sigma = 38.8)) %>% 
  ggplot() +
  geom_density(aes(x = x), fill = theme_yellow, color = NA) +
  ggtitle('student_t(3, 0, 38.8) for sigma') +
  coord_cartesian(xlim = c(0, 400),expand = c(0)),
ncol = 3)

We do a prior check for default priors to see the range of prior predictions.

model.1.checks_default <- brm(
  model.1.formula,
  data = pose_df, 
  family = student_t(),
  prior = c(
    # need proper priors for prior predictive checks
    prior(normal(0, 3), class = 'b')
  ),
  # to allow draw from prior distributions
  sample_prior = 'only',
  backend = BRM_BACKEND,
  # save the model
  file = 'rds/model.1.checks_default.rds',
  file_refit = 'on_change' 
)
## Running MCMC with 4 sequential chains...
## 
## Chain 1 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 1 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 1 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 1 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 1 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 1 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 1 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 1 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 1 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 1 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 1 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 1 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 1 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 1 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 1 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 1 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 1 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 1 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 1 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 1 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 1 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 1 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 1 finished in 0.0 seconds.
## Chain 2 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 2 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 2 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 2 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 2 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 2 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 2 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 2 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 2 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 2 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 2 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 2 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 2 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 2 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 2 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 2 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 2 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 2 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 2 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 2 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 2 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 2 finished in 0.0 seconds.
## Chain 3 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 3 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 3 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 3 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 3 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 3 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 3 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 3 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 3 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 3 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 3 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 3 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 3 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 3 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 3 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 3 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 3 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 3 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 3 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 3 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 3 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 3 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 3 finished in 0.0 seconds.
## Chain 4 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 4 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 4 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 4 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 4 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 4 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 4 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 4 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 4 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 4 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 4 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 4 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 4 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 4 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 4 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 4 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 4 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 4 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 4 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 4 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 4 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 4 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 4 finished in 0.0 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 0.0 seconds.
## Total execution time: 0.5 seconds.

We draw from prior distributions using predicted_draws.

model.1.defaultpriorsamples <- 
  model.1.checks_default %>% 
    predicted_draws(tibble(condition = c('expansive', 'constrictive')))

Take a look at the prior prediction draws. You can ignore .row,.chain, ‘.iteration’. The .draw is the ID for each draw. .prediction is the value we care about.

head(model.1.defaultpriorsamples)
condition .row .chain .iteration .draw .prediction
expansive 1 NA NA 1 117.98567
expansive 1 NA NA 2 -55.82391
expansive 1 NA NA 3 20.33134
expansive 1 NA NA 4 13.47124
expansive 1 NA NA 5 30.67978
expansive 1 NA NA 6 -134.67811

We plot out the prediction draws. They look reasonable but have a wide range. We would expect so given the ranges of the priors.

model.1.defaultpriorsamples %>% 
  ggplot(aes(x = .prediction, group = condition)) +
  geom_density(alpha = .5, color = NA, adjust = 2, fill = theme_yellow) +
  theme_density_x + 
  ggtitle('Checks default priors of model.1')

We want to use narrow priors.

cowplot::plot_grid(
tibble(x = qstudent_t(ppoints(n = 1000), mu = 22.6, sigma = 10, df = 3)) %>% 
  ggplot() +
  geom_density(aes(x = x), fill = theme_yellow, color = NA) +
  ggtitle('t(3, 22.6, 10) for Intercept') +
  coord_cartesian(xlim = c(-30, 80), expand = 0),
tibble(x = qnorm(ppoints(n = 1000), mean = 0, sd = 2)) %>% 
  ggplot() +
  geom_density(aes(x = x), fill = theme_yellow, color = NA) +
  ggtitle('Normal(0, 2) for b') +
  coord_cartesian(xlim = c(-8, 8), expand = 0),
tibble(x = qnorm(ppoints(n = 1000), mean = 0, sd = 10)) %>% 
  ggplot() +
  geom_density(aes(x = x), fill = theme_yellow, color = NA) +
  ggtitle('HalfNormal(0, 10) for sigma') +
  coord_cartesian(xlim = c(0, 50), expand = c(0)),
ncol = 3)

We can do another prior check.

model.1.checks <- brm(
  model.1.formula,
  data = pose_df, 
  family = student_t(),
  prior = c(
    prior(student_t(3, 22.6, 10), class = "Intercept"),
    prior(normal(0, 2), class = 'b'),
    prior(normal(0, 10), class = 'sigma', lb = 0)
  ),
  sample_prior = 'only',
  backend = BRM_BACKEND,
  file = 'rds/model.1.checks.rds',
  file_refit = 'on_change' 
)

We draw from the new prior distribution.

model.1.priorsamples <- 
  model.1.checks %>% 
    predicted_draws(tibble(condition = c('expansive', 'constrictive')))

We compare two sets of prior distributions. Visually, using our priors have a narrower range.

cowplot::plot_grid(
  
model.1.defaultpriorsamples %>% 
  ggplot(aes(x = .prediction, group = condition)) +
  geom_density(alpha = .5, color = NA, adjust = 2, fill = theme_yellow) +
  theme_density_x + 
  # coord_cartesian(xlim = c(-800, 1000)) +
  ggtitle('Checks for default priors of model.1')
,
model.1.priorsamples %>% 
  ggplot(aes(x = .prediction, group = condition)) +
  geom_density(alpha = .5, color = NA, adjust = 2, fill = theme_yellow) +
  theme_density_x + 
  # coord_cartesian(xlim = c(-800, 1000)) +
  ggtitle('Checks for our priors of model.1')
,
ncol = 1)

Step 2: model fitting

Now we can fit the model. This model takes a

model.1 <- brm(
  model.1.formula,
  data = pose_df, 
  family = student_t(),
  prior = c(
    prior(normal(0, 2), class = 'b'),
    prior(normal(0, 10), class = 'sigma', lb = 0)
  ),
  backend = BRM_BACKEND,
  file = 'rds/model.1.rds'
)

Step 3: check posteriors

aspect 1: mcmc traces

First, we check the MCMC traces to ensure that the chains are mixed well.

color_scheme_set("teal")
mcmc_trace(model.1, facet_args = list(ncol = 4))

plot(model.1)

aspect 2: model metrics

We also check Rhat and ESS. We want Rhat to be close to 1 to ensure model convergence. We want Bulk ESS (effective sample size) to be at least a few hundreds (ideally, this should be at least a thousand.) to ensure reliable estimate of mean. We also want Tail ESS to be at this level. If ESSs are too low, it means there are too many correlations in posteriors draws. You need to increase the number of iterations and perhaps the number of chains.

summary(model.1)
##  Family: student 
##   Links: mu = identity; sigma = identity; nu = identity 
## Formula: change ~ condition 
##    Data: pose_df (Number of observations: 80) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Population-Level Effects: 
##                    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept             22.84      4.49    14.50    31.77 1.00     2686     2092
## conditionexpansive    -0.26      1.97    -4.10     3.62 1.00     2726     2987
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma    28.35      4.13    20.76    36.92 1.00     2180     2520
## nu        3.68      2.35     1.57     9.04 1.00     2172     2158
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

aspect 3: visually

pose_df.bayesiant.y <- pose_df$change

pose_df.bayesiant.yrep <- posterior_predict(model.1, ndraws = 30, seed = 1234)

ppc_dens_overlay(pose_df.bayesiant.y, pose_df.bayesiant.yrep, linewidth = 2)

Now we check the posterior prediction of this model to ensure that it generates reasonable predictions.

model.1.predictions <- 
  model.1 %>% 
    predicted_draws(tibble(condition = c('expansive', 'constrictive')))
head(model.1.predictions)
condition .row .chain .iteration .draw .prediction
expansive 1 NA NA 1 -63.38436
expansive 1 NA NA 2 126.77808
expansive 1 NA NA 3 51.27468
expansive 1 NA NA 4 28.01202
expansive 1 NA NA 5 13.67722
expansive 1 NA NA 6 13.10874
plot_predictions <- function(model, df = NULL, title = ''){
  
  if(is.null(df))
      df = tibble(condition = c('expansive', 'constrictive'), 
                           participant = c(-1, -1))
  model %>% 
    predicted_draws(df,
                    seed = 1234,
                    ndraws = NULL,
                    allow_new_levels = TRUE,
                    sample_new_levels = 'uncertainty') %>% 
    ggplot(aes(x = .prediction, colour = condition), fill = NA) +
    geom_density(alpha = .5, size = 1, adjust = 2) +
    scale_color_theme() +
    theme_density_x + 
    scale_y_continuous(breaks = 0, labels = 'constrictive') + 
    scale_x_continuous(breaks = seq(-150, 250, by = 50)) +
    coord_cartesian(xlim = c(-150, 250)) + 
    ggtitle(paste0('Posterior predictions of ', deparse(substitute(model))))
}
cowplot::plot_grid(plot_predictions(model.1), 
                   p1 + 
                    scale_x_continuous(breaks = seq(-150, 250, by = 100)) +
                    coord_cartesian(xlim = c(-150, 250)), 
                   nrow = 2)

We now generate the posterior predictions of means, which are of interests here.

model.1.posteriors <- 
  model.1 %>% 
    epred_draws(tibble(condition = c('expansive', 'constrictive')))
plot_posteriors <- function(model, df = NULL, title = ''){
  
  if(is.null(df))
     df = tibble(condition = c('expansive', 'constrictive'))
  
  model %>% 
    epred_draws(df, 
                # ignoring random effects if there is any
                #seed = 1234,
                ndraws = NULL,
                re_formula = NA) %>% 
    ggplot(aes(x = .epred, fill = condition)) +
    geom_density(alpha = .5, size = 1, adjust = 2, color = NA) +
    scale_color_theme() +
    theme_density_x + 
    scale_y_continuous(breaks = 0, labels = 'constrictive') + 
    scale_x_continuous(limits = c(-150, 250), breaks = seq(-150, 250, by = 50)) +
    ggtitle(paste0('Posterior means of ', deparse(substitute(model))))
}

cowplot::plot_grid(plot_posteriors(model.1), p1, nrow = 2)

Model 2: the BEST test model

Step 1: model specification

This model is the BEST test model as described by Kruschke in the paper Bayesian estimation supersedes the t-test. In this model, \(\beta\) indicates the mean difference in the outcome variable between the two groups (in this case, the percent change in the BART scores). We fit different priors on \(\beta\) and set different weights on these priors to obtain our posterior estimate.

\[ \begin{align} y_{i} &\sim \mathrm{T}(\nu, \mu, \sigma) \\ \mu &= \beta_{0} + \beta_{1} * x_i \\ \sigma &= \sigma_{a} + \sigma_{b}*x_i \\ \beta_{1} &\sim \mathrm{Normal}(\mu_{0}, \sigma_{0}) \\ \sigma_a, \sigma_b &\sim \mathrm{Cauchy}(0, 2) \\ \nu &\sim \mathrm{exp}(1/30)\\ i & \in \{\mathrm{expansive}, \mathrm{constrictive}\} \end{align} \]

model.2.formula <- bf(# we think change is affected by different conditions.
                      change ~ condition,
                      sigma ~ condition,
                      # to tell brms which response distribution to use
                      family = student())
tibble(get_prior(model.2.formula, pose_df))
prior class coef group resp dpar nlpar lb ub source
b default
b conditionexpansive default
student_t(3, 22.6, 38.8) Intercept default
gamma(2, 0.1) nu 1 default
b sigma default
b conditionexpansive sigma default
student_t(3, 0, 2.5) Intercept sigma default

prior check

cowplot::plot_grid(
  tibble(x = qnorm(ppoints(n = 1000),  mean = 0, sd = 2)) %>% 
  ggplot() +
  geom_density(aes(x = x), fill = theme_yellow, color = NA) +
  ggtitle('Normal(0 ,2) for Intercept') +
  coord_cartesian(expand = c(0)),
tibble(x = qexp(ppoints(n = 1000), rate = 0.0333)) %>% 
  ggplot() +
  geom_density(aes(x = x), fill = theme_yellow, color = NA) +
  ggtitle('exponential(0.0333) for nu') +
  coord_cartesian(expand = 0),
tibble(x = qcauchy(ppoints(n = 1000), location = 0, scale = 2)) %>% 
  ggplot() +
  geom_density(aes(x = x), fill = theme_yellow, color = NA) +
  ggtitle('cauchy(0, 2) for sigma') +
  coord_cartesian(expand = c(0)),
ncol = 3)

model.2.priorchecks <- brm(
  model.2.formula,
  data = pose_df, 
  family = student_t(),
  prior = c(
    prior(normal(0, 2), class = 'b'),
    prior(cauchy(0, 2), class = 'b', dpar = 'sigma'),
    prior(exponential(0.0333), class = 'nu')
  ),
  sample_prior = 'only',
  backend = BRM_BACKEND,
  file = 'rds/model.2.priorchecks.rds',
  file_refit = 'on_change' 
)
## Running MCMC with 4 sequential chains...
## 
## Chain 1 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 1 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 1 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 1 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 1 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 1 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 1 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 1 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 1 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 1 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 1 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 1 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 1 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 1 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 1 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 1 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 1 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 1 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 1 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 1 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 1 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 1 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 1 finished in 0.1 seconds.
## Chain 2 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 2 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 2 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 2 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 2 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 2 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 2 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 2 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 2 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 2 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 2 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 2 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 2 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 2 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 2 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 2 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 2 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 2 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 2 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 2 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 2 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 2 finished in 0.1 seconds.
## Chain 3 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 3 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 3 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 3 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 3 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 3 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 3 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 3 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 3 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 3 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 3 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 3 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 3 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 3 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 3 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 3 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 3 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 3 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 3 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 3 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 3 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 3 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 3 finished in 0.3 seconds.
## Chain 4 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 4 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 4 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 4 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 4 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 4 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 4 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 4 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 4 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 4 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 4 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 4 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 4 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 4 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 4 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 4 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 4 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 4 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 4 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 4 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 4 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 4 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 4 finished in 0.1 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 0.2 seconds.
## Total execution time: 0.9 seconds.
model.2.priorchecks %>% 
 epred_draws(tibble(condition = c('expansive', 'constrictive'))) %>% 
  ggplot(aes(x = .epred, group = condition)) +
  geom_density(alpha = .5, color = NA, adjust = 2, fill = theme_yellow) +
  theme_density_x + 
  ggtitle('Checks priors of model.2')

Step 2: model fiting

model.2 <- brm(
  model.2.formula,
  data = pose_df, 
  family = student_t(),
  prior = c(
    prior(normal(0, 2), class = 'b'),
    prior(cauchy(0, 2), class = 'b', dpar = 'sigma'),
    prior(exponential(0.0333), class = 'nu')
  ),
  backend = BRM_BACKEND,
  file = 'rds/model.2.rds',
  file_refit = 'on_change' 
)
## Running MCMC with 4 sequential chains...
## 
## Chain 1 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 1 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 1 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 1 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 1 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 1 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 1 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 1 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 1 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 1 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 1 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 1 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 1 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 1 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 1 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 1 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 1 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 1 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 1 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 1 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 1 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 1 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 1 finished in 0.1 seconds.
## Chain 2 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 2 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 2 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 2 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 2 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 2 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 2 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 2 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 2 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 2 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 2 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 2 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 2 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 2 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 2 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 2 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 2 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 2 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 2 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 2 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 2 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 2 finished in 0.2 seconds.
## Chain 3 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 3 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 3 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 3 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 3 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 3 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 3 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 3 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 3 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 3 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 3 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 3 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 3 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 3 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 3 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 3 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 3 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 3 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 3 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 3 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 3 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 3 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 3 finished in 0.2 seconds.
## Chain 4 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 4 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 4 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 4 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 4 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 4 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 4 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 4 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 4 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 4 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 4 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 4 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 4 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 4 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 4 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 4 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 4 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 4 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 4 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 4 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 4 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 4 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 4 finished in 0.1 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 0.1 seconds.
## Total execution time: 0.9 seconds.

Step 3: posterior checks

summary(model.2)
##  Family: student 
##   Links: mu = identity; sigma = log; nu = identity 
## Formula: change ~ condition 
##          sigma ~ condition
##    Data: pose_df (Number of observations: 80) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Population-Level Effects: 
##                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                   24.52      4.88    15.67    34.84 1.00     2333
## sigma_Intercept              3.41      0.20     3.00     3.79 1.00     2215
## conditionexpansive          -0.09      1.96    -3.93     3.64 1.00     3639
## sigma_conditionexpansive     0.15      0.21    -0.27     0.57 1.00     3051
##                          Tail_ESS
## Intercept                    2276
## sigma_Intercept              2420
## conditionexpansive           2377
## sigma_conditionexpansive     2668
## 
## Family Specific Parameters: 
##    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## nu     6.03      8.24     1.76    24.62 1.00     1860     1782
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
wrap_plots(plot_predictions(model.2) + scale_x_continuous(limits = c(-200,250), expand = c(0,0), breaks = seq(-150, 250, by = 50)) 
                   , 
                   p1 + scale_x_continuous(limits = c(-200,250), expand = c(0,0), breaks = seq(-150, 250, by = 50)) , 
                   nrow = 2)

wrap_plots(
  plot_posteriors(model.2), 
  p1,
nrow = 2)

model.2.posteriors <- model.2 %>% 
                epred_draws(tibble(condition = c('expansive', 'constrictive')), 
                #seed = 1234,
                ndraws = NULL,
                re_formula = NA) 

Model 3: the BEST model with better intercepts

Step 1: model specification

\[ \begin{align} y_{i} &\sim \mathrm{T}(\nu, \mu, \sigma) \\ \mu &= \beta_{i,j} + \beta_{1} * x_i \\ \sigma &= \sigma_{a} + \sigma_{b}*x_i \\ \beta_{1} &\sim \mathrm{Normal}(0, 2) \\ \sigma_a, \sigma_b &\sim \mathrm{HalfNormal}(0, 10) \\ \nu &\sim \mathrm{exp}(1/30)\\ i & \in \{\mathrm{expansive}, \mathrm{constrictive}\}\\ j & \in \{1, ..., \mathrm{N}\} \end{align} \]

model.3.formula <- bf(# we think change is affected by different conditions.
                      change ~ condition + (1|participant),
                      sigma ~ condition,
                      # to tell brms which response distribution to use
                      family = student())
tibble(get_prior(model.3.formula, pose_df))
prior class coef group resp dpar nlpar lb ub source
b default
b conditionexpansive default
student_t(3, 22.6, 38.8) Intercept default
gamma(2, 0.1) nu 1 default
student_t(3, 0, 38.8) sd 0 default
sd participant default
sd Intercept participant default
b sigma default
b conditionexpansive sigma default
student_t(3, 0, 2.5) Intercept sigma default

Step 2: model fiting

model.3 <- brm(
  model.3.formula,
  data = pose_df, 
  family = student_t(),
  prior = c(
    prior(normal(0, 2), class = 'b'),
    prior(normal(0, 2), class = 'b', dpar = 'sigma'),
    prior(normal(0, 2), class = 'sd', group = 'participant', lb = 0),
    prior(exponential(0.0333), class = 'nu')
  ),
  #backend = BRM_BACKEND,
  file = 'rds/model.3.rds',
  file_refit = 'on_change' 
)
## 
## SAMPLING FOR MODEL '098ecfbfafd8b9d1ae94bd6b9b0d7ae1' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 4.1e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.41 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.211699 seconds (Warm-up)
## Chain 1:                0.182771 seconds (Sampling)
## Chain 1:                0.39447 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL '098ecfbfafd8b9d1ae94bd6b9b0d7ae1' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 1.3e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.13 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.218319 seconds (Warm-up)
## Chain 2:                0.185932 seconds (Sampling)
## Chain 2:                0.404251 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL '098ecfbfafd8b9d1ae94bd6b9b0d7ae1' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 1.3e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.13 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.200957 seconds (Warm-up)
## Chain 3:                0.18314 seconds (Sampling)
## Chain 3:                0.384097 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL '098ecfbfafd8b9d1ae94bd6b9b0d7ae1' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 1.6e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.16 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.210974 seconds (Warm-up)
## Chain 4:                0.179407 seconds (Sampling)
## Chain 4:                0.390381 seconds (Total)
## Chain 4:

Step 3: posterior checks

summary(model.3)
##  Family: student 
##   Links: mu = identity; sigma = log; nu = identity 
## Formula: change ~ condition + (1 | participant) 
##          sigma ~ condition
##    Data: pose_df (Number of observations: 80) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~participant (Number of levels: 80) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.63      1.23     0.06     4.57 1.00     3650     2196
## 
## Population-Level Effects: 
##                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                   24.48      4.77    15.62    34.43 1.00     5853
## sigma_Intercept              3.40      0.20     3.01     3.79 1.00     5493
## conditionexpansive          -0.09      1.92    -3.77     3.73 1.00    12370
## sigma_conditionexpansive     0.15      0.21    -0.29     0.58 1.00     9195
##                          Tail_ESS
## Intercept                    2933
## sigma_Intercept              2867
## conditionexpansive           2502
## sigma_conditionexpansive     2684
## 
## Family Specific Parameters: 
##    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## nu     6.09      8.76     1.78    24.32 1.00     4880     2600
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
cowplot::plot_grid(plot_predictions(model.3), p1, nrow = 2)

cowplot::plot_grid(plot_posteriors(model.3), p1, nrow = 2)

model.3.posteriors <-
  model.3 %>% 
  epred_draws(tibble(condition = c('expansive', 'constrictive')),
               re_formula = NA)
wrap_plots(
model.3.posteriors %>%
  mutate(model = 'model 3') %>%
  rbind(
    model.2.posteriors %>% 
      mutate(model = 'model 2'))  %>% 
  rbind(
    model.1.posteriors %>% 
      mutate(model = 'model 1')) %>% 
ggplot() + 
  geom_density(aes(x = .epred, fill = condition), adjust = 1.5, color = NA, alpha = .5) +
  facet_grid(model ~ .) +
  scale_x_continuous(limits = c(-150, 250), breaks = seq(-150, 250, by = 50)) +
  scale_color_theme() +
  theme_density_x +
  ggtitle('Compare the means of all three models'),
p1, nrow = 2, heights = c(4,1.5))

Model 4: Negative-Binomial Regression model

Understanding the data

pose_raw_df = read.csv("data/posture_data-raw.csv") %>%
  mutate(participant = factor(participant)) %>%
  rename(trial = trial.number)

head(pose_raw_df)
participant condition total.money trial trial.money exploded pumps life
1 expansive 910 0 62 0 62 72
1 expansive 910 1 0 1 27 27
1 expansive 910 2 47 0 47 98
1 expansive 910 3 0 1 86 86
1 expansive 910 4 60 0 60 104
1 expansive 910 5 0 1 26 26
tibble(x = seq(1, 160, by = 1)) %>%
  ggplot(aes(x)) +
  geom_function(
    color = theme_blue,
    linetype = 'dashed',
    fun = function(x) # need to fix
      dnbinom(round(x), size = 5, mu = 64),
    size = 1
  ) +
  # stat_function(geom = "area", fill = theme_red, fun = function(x) dnbinom(round(x), size = 5, mu = 64), xlim = c(128, 160), alpha = 0.5) +
  geom_vline(xintercept = 0, color = "red", size = 1, alpha = 0.5) +
  geom_vline(xintercept = 128, color = "red", size = 1, alpha = 0.5) +
  coord_cartesian(xlim = c(0, 160)) +
  theme(
    axis.line.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.title.y = element_blank(),
    axis.text.x = element_text(size = 16),
    axis.title.x = element_blank()
  )

pose_raw_df %>%
  mutate(c = as.factor(condition)) %>%
  ggplot(aes(x = pumps)) +
  geom_histogram(
    aes(y = ..density..),
    binwidth = 2,
    fill = theme_yellow,
    alpha = .5,
    color = 'white'
  ) +
  geom_density(size = 1,
               adjust = 3,
               color = theme_blue) +
  geom_function(
    color = "#222222",
    linetype = 'dashed',
    fun = function(x) # need to fix
      dnbinom(round(x), size = 3, mu = 40), # this does not use the log-link but brm does so the prior below is # log(387)
    size = 1
  ) +
  theme(
    axis.line.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.title.y = element_blank()
  )

Step 1: model specification

\[ \begin{align} y_{i} &\sim \mathrm{Neg-Binomial}(\mu, \phi) \\ log(\mu) &= \beta_{0} + \beta_{1} . x_i \\ \beta_{0} &\sim \mathrm{Normal}(4.1, 0.5) \\ \beta_{0, j} &\sim \mathrm{Student\_t}(3, 0, 1) \\ \beta_{1} &\sim \mathrm{Normal}(0, 0.5) \\ \phi &\sim \mathrm{Gamma}(5, 1) \\ i & \in \{\mathrm{expansive}, \mathrm{constrictive}\}\\ \end{align} \]

Step 2: prior predictive checks

model.4.prior_pred = brm(pumps ~ 1 + condition,
                      data = pose_raw_df, family = negbinomial(),
                      prior = c(prior(normal(4.1, 0.5), class = Intercept),
                                prior(normal(0, 0.5), class = b),
                                prior(gamma(5, 1), class = shape)
                      ),
                      backend = BRM_BACKEND,
                      file = 'rds/model.4.prior_predictive.rds',
                      file_refit = 'on_change' ,
                      sample_prior = "only",
                      iter = 4000, warmup = 1000, cores = 4, chains = 4)
model.4.prior_pred.samples <- model.4.prior_pred %>% 
    predicted_draws(tibble(condition = c('expansive', 'constrictive')), re_formula = NA)

head(model.4.prior_pred.samples)
condition .row .chain .iteration .draw .prediction
expansive 1 NA NA 1 42
expansive 1 NA NA 2 74
expansive 1 NA NA 3 9
expansive 1 NA NA 4 37
expansive 1 NA NA 5 62
expansive 1 NA NA 6 16
ggplot() +
geom_histogram(
  data = pose_raw_df,
  mapping = aes(x = pumps, y = ..density..),
  binwidth = 10,
  alpha = 0.5,
  fill = theme_yellow,
  color = 'white'
) +
geom_density(model.4.prior_pred.samples,
               mapping = aes(x = .prediction, y = ..density..), adjust = 1, stroke = theme_blue, size = 1) +
theme_density_x + 
coord_cartesian(xlim = c(0, 150)) +
theme(
  axis.text.x = element_text(size = 16)
)

Step 3: model fitting

model.4 = brm(pumps ~ 1 + condition,
                      data = pose_raw_df, family = negbinomial(),
                      prior = c(prior(normal(4.1, 0.5), class = Intercept),
                                prior(normal(0, 0.5), class = b),
                                prior(gamma(5, 1), class = shape)
                      ),
                      backend = BRM_BACKEND,
                      file = 'rds/model.4.rds',
                      file_refit = 'on_change' ,
                      iter = 4000, warmup = 1000, cores = 4, chains = 4)
## Running MCMC with 4 parallel chains...
## 
## Chain 1 Iteration:    1 / 4000 [  0%]  (Warmup) 
## Chain 1 Iteration:  100 / 4000 [  2%]  (Warmup) 
## Chain 2 Iteration:    1 / 4000 [  0%]  (Warmup) 
## Chain 2 Iteration:  100 / 4000 [  2%]  (Warmup) 
## Chain 3 Iteration:    1 / 4000 [  0%]  (Warmup) 
## Chain 3 Iteration:  100 / 4000 [  2%]  (Warmup) 
## Chain 4 Iteration:    1 / 4000 [  0%]  (Warmup) 
## Chain 4 Iteration:  100 / 4000 [  2%]  (Warmup) 
## Chain 1 Iteration:  200 / 4000 [  5%]  (Warmup) 
## Chain 1 Iteration:  300 / 4000 [  7%]  (Warmup) 
## Chain 2 Iteration:  200 / 4000 [  5%]  (Warmup) 
## Chain 2 Iteration:  300 / 4000 [  7%]  (Warmup) 
## Chain 3 Iteration:  200 / 4000 [  5%]  (Warmup) 
## Chain 3 Iteration:  300 / 4000 [  7%]  (Warmup) 
## Chain 4 Iteration:  200 / 4000 [  5%]  (Warmup) 
## Chain 4 Iteration:  300 / 4000 [  7%]  (Warmup) 
## Chain 1 Iteration:  400 / 4000 [ 10%]  (Warmup) 
## Chain 1 Iteration:  500 / 4000 [ 12%]  (Warmup) 
## Chain 2 Iteration:  400 / 4000 [ 10%]  (Warmup) 
## Chain 2 Iteration:  500 / 4000 [ 12%]  (Warmup) 
## Chain 3 Iteration:  400 / 4000 [ 10%]  (Warmup) 
## Chain 3 Iteration:  500 / 4000 [ 12%]  (Warmup) 
## Chain 4 Iteration:  400 / 4000 [ 10%]  (Warmup) 
## Chain 4 Iteration:  500 / 4000 [ 12%]  (Warmup) 
## Chain 1 Iteration:  600 / 4000 [ 15%]  (Warmup) 
## Chain 1 Iteration:  700 / 4000 [ 17%]  (Warmup) 
## Chain 1 Iteration:  800 / 4000 [ 20%]  (Warmup) 
## Chain 2 Iteration:  600 / 4000 [ 15%]  (Warmup) 
## Chain 2 Iteration:  700 / 4000 [ 17%]  (Warmup) 
## Chain 3 Iteration:  600 / 4000 [ 15%]  (Warmup) 
## Chain 3 Iteration:  700 / 4000 [ 17%]  (Warmup) 
## Chain 3 Iteration:  800 / 4000 [ 20%]  (Warmup) 
## Chain 4 Iteration:  600 / 4000 [ 15%]  (Warmup) 
## Chain 4 Iteration:  700 / 4000 [ 17%]  (Warmup) 
## Chain 1 Iteration:  900 / 4000 [ 22%]  (Warmup) 
## Chain 1 Iteration: 1000 / 4000 [ 25%]  (Warmup) 
## Chain 1 Iteration: 1001 / 4000 [ 25%]  (Sampling) 
## Chain 2 Iteration:  800 / 4000 [ 20%]  (Warmup) 
## Chain 2 Iteration:  900 / 4000 [ 22%]  (Warmup) 
## Chain 2 Iteration: 1000 / 4000 [ 25%]  (Warmup) 
## Chain 2 Iteration: 1001 / 4000 [ 25%]  (Sampling) 
## Chain 3 Iteration:  900 / 4000 [ 22%]  (Warmup) 
## Chain 3 Iteration: 1000 / 4000 [ 25%]  (Warmup) 
## Chain 3 Iteration: 1001 / 4000 [ 25%]  (Sampling) 
## Chain 4 Iteration:  800 / 4000 [ 20%]  (Warmup) 
## Chain 4 Iteration:  900 / 4000 [ 22%]  (Warmup) 
## Chain 1 Iteration: 1100 / 4000 [ 27%]  (Sampling) 
## Chain 1 Iteration: 1200 / 4000 [ 30%]  (Sampling) 
## Chain 2 Iteration: 1100 / 4000 [ 27%]  (Sampling) 
## Chain 2 Iteration: 1200 / 4000 [ 30%]  (Sampling) 
## Chain 3 Iteration: 1100 / 4000 [ 27%]  (Sampling) 
## Chain 3 Iteration: 1200 / 4000 [ 30%]  (Sampling) 
## Chain 4 Iteration: 1000 / 4000 [ 25%]  (Warmup) 
## Chain 4 Iteration: 1001 / 4000 [ 25%]  (Sampling) 
## Chain 4 Iteration: 1100 / 4000 [ 27%]  (Sampling) 
## Chain 1 Iteration: 1300 / 4000 [ 32%]  (Sampling) 
## Chain 1 Iteration: 1400 / 4000 [ 35%]  (Sampling) 
## Chain 2 Iteration: 1300 / 4000 [ 32%]  (Sampling) 
## Chain 3 Iteration: 1300 / 4000 [ 32%]  (Sampling) 
## Chain 3 Iteration: 1400 / 4000 [ 35%]  (Sampling) 
## Chain 4 Iteration: 1200 / 4000 [ 30%]  (Sampling) 
## Chain 4 Iteration: 1300 / 4000 [ 32%]  (Sampling) 
## Chain 1 Iteration: 1500 / 4000 [ 37%]  (Sampling) 
## Chain 1 Iteration: 1600 / 4000 [ 40%]  (Sampling) 
## Chain 2 Iteration: 1400 / 4000 [ 35%]  (Sampling) 
## Chain 2 Iteration: 1500 / 4000 [ 37%]  (Sampling) 
## Chain 3 Iteration: 1500 / 4000 [ 37%]  (Sampling) 
## Chain 3 Iteration: 1600 / 4000 [ 40%]  (Sampling) 
## Chain 4 Iteration: 1400 / 4000 [ 35%]  (Sampling) 
## Chain 4 Iteration: 1500 / 4000 [ 37%]  (Sampling) 
## Chain 1 Iteration: 1700 / 4000 [ 42%]  (Sampling) 
## Chain 1 Iteration: 1800 / 4000 [ 45%]  (Sampling) 
## Chain 2 Iteration: 1600 / 4000 [ 40%]  (Sampling) 
## Chain 2 Iteration: 1700 / 4000 [ 42%]  (Sampling) 
## Chain 3 Iteration: 1700 / 4000 [ 42%]  (Sampling) 
## Chain 3 Iteration: 1800 / 4000 [ 45%]  (Sampling) 
## Chain 4 Iteration: 1600 / 4000 [ 40%]  (Sampling) 
## Chain 4 Iteration: 1700 / 4000 [ 42%]  (Sampling) 
## Chain 1 Iteration: 1900 / 4000 [ 47%]  (Sampling) 
## Chain 1 Iteration: 2000 / 4000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 1800 / 4000 [ 45%]  (Sampling) 
## Chain 2 Iteration: 1900 / 4000 [ 47%]  (Sampling) 
## Chain 3 Iteration: 1900 / 4000 [ 47%]  (Sampling) 
## Chain 3 Iteration: 2000 / 4000 [ 50%]  (Sampling) 
## Chain 4 Iteration: 1800 / 4000 [ 45%]  (Sampling) 
## Chain 4 Iteration: 1900 / 4000 [ 47%]  (Sampling) 
## Chain 1 Iteration: 2100 / 4000 [ 52%]  (Sampling) 
## Chain 1 Iteration: 2200 / 4000 [ 55%]  (Sampling) 
## Chain 2 Iteration: 2000 / 4000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 2100 / 4000 [ 52%]  (Sampling) 
## Chain 3 Iteration: 2100 / 4000 [ 52%]  (Sampling) 
## Chain 3 Iteration: 2200 / 4000 [ 55%]  (Sampling) 
## Chain 4 Iteration: 2000 / 4000 [ 50%]  (Sampling) 
## Chain 4 Iteration: 2100 / 4000 [ 52%]  (Sampling) 
## Chain 1 Iteration: 2300 / 4000 [ 57%]  (Sampling) 
## Chain 1 Iteration: 2400 / 4000 [ 60%]  (Sampling) 
## Chain 2 Iteration: 2200 / 4000 [ 55%]  (Sampling) 
## Chain 2 Iteration: 2300 / 4000 [ 57%]  (Sampling) 
## Chain 3 Iteration: 2300 / 4000 [ 57%]  (Sampling) 
## Chain 3 Iteration: 2400 / 4000 [ 60%]  (Sampling) 
## Chain 4 Iteration: 2200 / 4000 [ 55%]  (Sampling) 
## Chain 4 Iteration: 2300 / 4000 [ 57%]  (Sampling) 
## Chain 1 Iteration: 2500 / 4000 [ 62%]  (Sampling) 
## Chain 1 Iteration: 2600 / 4000 [ 65%]  (Sampling) 
## Chain 2 Iteration: 2400 / 4000 [ 60%]  (Sampling) 
## Chain 2 Iteration: 2500 / 4000 [ 62%]  (Sampling) 
## Chain 3 Iteration: 2500 / 4000 [ 62%]  (Sampling) 
## Chain 3 Iteration: 2600 / 4000 [ 65%]  (Sampling) 
## Chain 4 Iteration: 2400 / 4000 [ 60%]  (Sampling) 
## Chain 4 Iteration: 2500 / 4000 [ 62%]  (Sampling) 
## Chain 1 Iteration: 2700 / 4000 [ 67%]  (Sampling) 
## Chain 2 Iteration: 2600 / 4000 [ 65%]  (Sampling) 
## Chain 2 Iteration: 2700 / 4000 [ 67%]  (Sampling) 
## Chain 3 Iteration: 2700 / 4000 [ 67%]  (Sampling) 
## Chain 3 Iteration: 2800 / 4000 [ 70%]  (Sampling) 
## Chain 4 Iteration: 2600 / 4000 [ 65%]  (Sampling) 
## Chain 4 Iteration: 2700 / 4000 [ 67%]  (Sampling) 
## Chain 1 Iteration: 2800 / 4000 [ 70%]  (Sampling) 
## Chain 1 Iteration: 2900 / 4000 [ 72%]  (Sampling) 
## Chain 2 Iteration: 2800 / 4000 [ 70%]  (Sampling) 
## Chain 3 Iteration: 2900 / 4000 [ 72%]  (Sampling) 
## Chain 3 Iteration: 3000 / 4000 [ 75%]  (Sampling) 
## Chain 4 Iteration: 2800 / 4000 [ 70%]  (Sampling) 
## Chain 4 Iteration: 2900 / 4000 [ 72%]  (Sampling) 
## Chain 1 Iteration: 3000 / 4000 [ 75%]  (Sampling) 
## Chain 1 Iteration: 3100 / 4000 [ 77%]  (Sampling) 
## Chain 2 Iteration: 2900 / 4000 [ 72%]  (Sampling) 
## Chain 2 Iteration: 3000 / 4000 [ 75%]  (Sampling) 
## Chain 3 Iteration: 3100 / 4000 [ 77%]  (Sampling) 
## Chain 3 Iteration: 3200 / 4000 [ 80%]  (Sampling) 
## Chain 3 Iteration: 3300 / 4000 [ 82%]  (Sampling) 
## Chain 4 Iteration: 3000 / 4000 [ 75%]  (Sampling) 
## Chain 4 Iteration: 3100 / 4000 [ 77%]  (Sampling) 
## Chain 1 Iteration: 3200 / 4000 [ 80%]  (Sampling) 
## Chain 1 Iteration: 3300 / 4000 [ 82%]  (Sampling) 
## Chain 2 Iteration: 3100 / 4000 [ 77%]  (Sampling) 
## Chain 2 Iteration: 3200 / 4000 [ 80%]  (Sampling) 
## Chain 3 Iteration: 3400 / 4000 [ 85%]  (Sampling) 
## Chain 3 Iteration: 3500 / 4000 [ 87%]  (Sampling) 
## Chain 4 Iteration: 3200 / 4000 [ 80%]  (Sampling) 
## Chain 4 Iteration: 3300 / 4000 [ 82%]  (Sampling) 
## Chain 1 Iteration: 3400 / 4000 [ 85%]  (Sampling) 
## Chain 1 Iteration: 3500 / 4000 [ 87%]  (Sampling) 
## Chain 2 Iteration: 3300 / 4000 [ 82%]  (Sampling) 
## Chain 2 Iteration: 3400 / 4000 [ 85%]  (Sampling) 
## Chain 3 Iteration: 3600 / 4000 [ 90%]  (Sampling) 
## Chain 3 Iteration: 3700 / 4000 [ 92%]  (Sampling) 
## Chain 4 Iteration: 3400 / 4000 [ 85%]  (Sampling) 
## Chain 1 Iteration: 3600 / 4000 [ 90%]  (Sampling) 
## Chain 1 Iteration: 3700 / 4000 [ 92%]  (Sampling) 
## Chain 2 Iteration: 3500 / 4000 [ 87%]  (Sampling) 
## Chain 2 Iteration: 3600 / 4000 [ 90%]  (Sampling) 
## Chain 3 Iteration: 3800 / 4000 [ 95%]  (Sampling) 
## Chain 3 Iteration: 3900 / 4000 [ 97%]  (Sampling) 
## Chain 4 Iteration: 3500 / 4000 [ 87%]  (Sampling) 
## Chain 4 Iteration: 3600 / 4000 [ 90%]  (Sampling) 
## Chain 1 Iteration: 3800 / 4000 [ 95%]  (Sampling) 
## Chain 1 Iteration: 3900 / 4000 [ 97%]  (Sampling) 
## Chain 2 Iteration: 3700 / 4000 [ 92%]  (Sampling) 
## Chain 2 Iteration: 3800 / 4000 [ 95%]  (Sampling) 
## Chain 3 Iteration: 4000 / 4000 [100%]  (Sampling) 
## Chain 4 Iteration: 3700 / 4000 [ 92%]  (Sampling) 
## Chain 4 Iteration: 3800 / 4000 [ 95%]  (Sampling) 
## Chain 3 finished in 2.2 seconds.
## Chain 1 Iteration: 4000 / 4000 [100%]  (Sampling) 
## Chain 2 Iteration: 3900 / 4000 [ 97%]  (Sampling) 
## Chain 2 Iteration: 4000 / 4000 [100%]  (Sampling) 
## Chain 4 Iteration: 3900 / 4000 [ 97%]  (Sampling) 
## Chain 4 Iteration: 4000 / 4000 [100%]  (Sampling) 
## Chain 1 finished in 2.3 seconds.
## Chain 2 finished in 2.4 seconds.
## Chain 4 finished in 2.3 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 2.3 seconds.
## Total execution time: 2.4 seconds.
summary(model.4)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: pumps ~ 1 + condition 
##    Data: pose_raw_df (Number of observations: 2400) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Population-Level Effects: 
##                    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              3.64      0.01     3.61     3.66 1.00    11222     8821
## conditionexpansive    -0.01      0.02    -0.05     0.03 1.00    11532     9305
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     4.59      0.15     4.31     4.89 1.00    11929     8873
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pose_raw_df.bayesian_poisson.y <- pose_raw_df$pumps

pose_raw_df.bayesian_poisson.yrep <- posterior_predict(model.4, ndraws = 30, seed = 1234)

ppc_dens_overlay(y = pose_raw_df.bayesian_poisson.y,
                 yrep = pose_raw_df.bayesian_poisson.yrep)

Step 4: model interpretation

The results of this model are on the log-odds scale. What do the coefficients mean? The simplest way is to simply transform the data into a more interpretable scale and visualise the results:

p.model.4 = pose_raw_df %>%
  ggplot() +
  geom_point(aes(x = pumps, y = condition, colour = condition), 
             position = position_jitter(height = 0.1), alpha = 0.7) +
  scale_color_theme() + 
  labs(y = "Condition") +
  theme(
    legend.position = "none", 
    axis.line.y = element_blank(), 
    axis.ticks.y = element_blank(),
    axis.title.y = element_blank()
  ) +
  scale_x_continuous(limits = c(0, 130), breaks = seq(0, 150, by = 30))

draws.model.4 = plot_predictions(model.4, df = crossing(condition = c('expansive', 'constrictive'), 
                                          trial = 0:29,
                                          participant = 0)) + 
    scale_x_continuous(breaks = seq(0, 150, by = 30)) +
    coord_cartesian(xlim = c(0, 150))

wrap_plots(
  draws.model.4, 
  p.model.4,
nrow = 2)

We generate the average of 30 trials using an average participant.

model.4.posteriors <-  model.4 %>% 
    epred_draws(crossing(condition = c('expansive', 'constrictive'),     
                       trial = 0:29),
                re_formula = NA,
                allow_new_levels  = FALSE) %>% 
  mutate(model = 'model 4')  %>% 
  group_by(.draw, condition) %>% 
  summarise(.epred = mean(.epred))
model.4.posteriors %>%
  ungroup() %>%
  compare_levels(.epred, by = condition) %>%
  median_qi(.width = .95) %>%
  ggplot() + 
  geom_pointinterval(aes(x = .epred, y = condition, xmin = .lower, xmax = .upper)) +
  geom_vline(xintercept = 0, linetype = 2, color = "#979797") +
  scale_x_continuous(breaks = seq(-10, 10, by = 2)) +
  coord_cartesian(xlim = c(-10, 10)) +
  xlab("Difference in Mean") +
  theme(
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16),
    axis.title.y = element_blank()
  )

Plot out the posteriors for mean.

model.4.posteriors %>% 
 ggplot(aes(x = .epred, fill = condition)) +
    stat_halfeye(.width = .95) +
    scale_color_theme() +
    facet_wrap(condition ~ ., ncol = 1) + 
    theme_density_x + 
    scale_x_continuous(limits = c(30, 50), breaks = seq(0, 60, by = 10)) +
    ggtitle(paste0('Posterior means of ', deparse(substitute(model))))

From this, we can see that there does not appear to be a difference between the two conditions.

Reporting

Let’s use model 4

model

We will need the ESSs and RHat from this print. Also, if you want to report the standard deviation of random intercepts or CIs for other paramaters. You can find them via summary(..)

summary(model.4)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: pumps ~ 1 + condition 
##    Data: pose_raw_df (Number of observations: 2400) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Population-Level Effects: 
##                    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              3.64      0.01     3.61     3.66 1.00    11222     8821
## conditionexpansive    -0.01      0.02    -0.05     0.03 1.00    11532     9305
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     4.59      0.15     4.31     4.89 1.00    11929     8873
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Credible Intervals

We compute the credible intervals (CIs; Bayesian analogy to confidence intervals) for the two conditions.

model.4.posteriors.CI <- 
model.4.posteriors %>% 
  group_by(condition) %>% 
  median_qi(.epred, width = .95)
model.4.posteriors.CI
condition .epred .epred.lower .epred.upper width width.lower width.upper .width .point .interval
constrictive 37.93712 36.88259 39.01633 0.95 0.95 0.95 0.95 median qi
expansive 37.65855 36.61154 38.72292 0.95 0.95 0.95 0.95 median qi
model.4.posteriors %>% 
ggplot() +
  geom_density(aes(x = .epred, fill = condition), alpha = .5, color = NA) +
  geom_point(model.4.posteriors.CI, 
             mapping = aes(x = .epred, y = 0), size = 3) + 
  geom_errorbarh(model.4.posteriors.CI, 
                 mapping = aes(x = .epred, xmin = .epred.lower, xmax = .epred.upper, y = 0), height = 0, linewidth = 1.5) + 
  facet_wrap(condition ~ ., ncol = 1) + 
  scale_x_continuous(limits = c(0, 80)) + 
  scale_y_continuous(expand = c(.02,.02)) + 
  scale_color_theme() +
  theme_density_x 

subtraction

model.4.posteriors_diff <- 
model.4.posteriors %>% 
  ungroup() %>% 
  compare_levels(variable = .epred, by = condition) %>% 
  ungroup()
head(model.4.posteriors_diff)
.draw condition .epred
1 expansive - constrictive -0.1462938
2 expansive - constrictive -0.5253759
3 expansive - constrictive -0.8757953
4 expansive - constrictive -1.5400207
5 expansive - constrictive -0.3486310
6 expansive - constrictive -0.0051823
model.4.posteriors_diff.CI <-
model.4.posteriors_diff %>% 
  median_qi(.epred)

model.4.posteriors_diff.CI
.epred .lower .upper .width .point .interval
-0.2807261 -1.750643 1.235321 0.95 median qi
model.4.posteriors_diff %>% 
ggplot() +
  geom_density(aes(x = .epred), alpha = .5,  fill = 'skyblue', color = NA, adjust = 2) +
  geom_point(model.4.posteriors_diff.CI, 
             mapping = aes(x = .epred, y = 0), size = 3) + 
  geom_errorbarh(model.4.posteriors_diff.CI, 
                 mapping = aes(x = .epred, xmin = .lower, xmax = .upper, y = 0), height = 0, linewidth = 1.5) + 
  #scale_x_continuous(limits = c(-50, 50)) + 
  xlab('expansive - constrictive') +
  ggtitle('Mean difference in expansive and constrictive') + 
  geom_vline(xintercept = 0, linetype = 2) + 
  scale_y_continuous(expand = c(.02,.02)) + 
  scale_x_continuous(limits = c(-10, 10), breaks = seq(-10, 10, by = 5)) + 
  scale_color_theme() +
  theme_density_x 

Model 5 (Bonus): Negative-Binomial Regression model with other predictors

Step 1: model specification

\[ \begin{align} y_{i} &\sim \mathrm{Neg-Binomial}(\mu, \phi) \\ log(\mu) &= \beta_{0,k} + \beta_{1} . x_i + \beta_j . x_j \\ \beta_{0, j} &\sim \mathrm{Normal}(\alpha, \sigma) \\ \alpha &\sim \mathrm{Normal}(4.1, 0.5) \\ \sigma &\sim \mathrm{Exponential}(2) \\ \beta_{0, j} &\sim \mathrm{Student\_t}(3, 0, 1) \\ \beta_{1} &\sim \mathrm{Normal}(0, 0.5) \\ \phi &\sim \mathrm{Gamma}(5, 1) \\ i & \in \{\mathrm{expansive}, \mathrm{constrictive}\}\\ j & \in \{1, ..., \mathrm{N}\} \end{align} \]

Step 2: model fitting

model.5 = brm(pumps ~ 1 + condition + trial + (1|participant),
                      data = pose_raw_df, family = negbinomial(),
                      prior = c(prior(normal(4.1, 0.5), class = Intercept),
                                prior(normal(0, 0.5), class = b),
                                prior(exponential(2), class = sd),
                                prior(gamma(5, 1), class = shape)
                      ),
                      backend = BRM_BACKEND,
                      file = 'rds/model.4.rds',
                      file_refit = 'on_change' ,
                      iter = 4000, warmup = 1000, cores = 4, chains = 4)
## Running MCMC with 4 parallel chains...
## 
## Chain 1 Iteration:    1 / 4000 [  0%]  (Warmup) 
## Chain 2 Iteration:    1 / 4000 [  0%]  (Warmup) 
## Chain 3 Iteration:    1 / 4000 [  0%]  (Warmup) 
## Chain 4 Iteration:    1 / 4000 [  0%]  (Warmup) 
## Chain 2 Iteration:  100 / 4000 [  2%]  (Warmup) 
## Chain 3 Iteration:  100 / 4000 [  2%]  (Warmup) 
## Chain 1 Iteration:  100 / 4000 [  2%]  (Warmup) 
## Chain 4 Iteration:  100 / 4000 [  2%]  (Warmup) 
## Chain 2 Iteration:  200 / 4000 [  5%]  (Warmup) 
## Chain 3 Iteration:  200 / 4000 [  5%]  (Warmup) 
## Chain 4 Iteration:  200 / 4000 [  5%]  (Warmup) 
## Chain 1 Iteration:  200 / 4000 [  5%]  (Warmup) 
## Chain 2 Iteration:  300 / 4000 [  7%]  (Warmup) 
## Chain 4 Iteration:  300 / 4000 [  7%]  (Warmup) 
## Chain 3 Iteration:  300 / 4000 [  7%]  (Warmup) 
## Chain 1 Iteration:  300 / 4000 [  7%]  (Warmup) 
## Chain 2 Iteration:  400 / 4000 [ 10%]  (Warmup) 
## Chain 3 Iteration:  400 / 4000 [ 10%]  (Warmup) 
## Chain 4 Iteration:  400 / 4000 [ 10%]  (Warmup) 
## Chain 2 Iteration:  500 / 4000 [ 12%]  (Warmup) 
## Chain 1 Iteration:  400 / 4000 [ 10%]  (Warmup) 
## Chain 2 Iteration:  600 / 4000 [ 15%]  (Warmup) 
## Chain 3 Iteration:  500 / 4000 [ 12%]  (Warmup) 
## Chain 4 Iteration:  500 / 4000 [ 12%]  (Warmup) 
## Chain 1 Iteration:  500 / 4000 [ 12%]  (Warmup) 
## Chain 2 Iteration:  700 / 4000 [ 17%]  (Warmup) 
## Chain 3 Iteration:  600 / 4000 [ 15%]  (Warmup) 
## Chain 4 Iteration:  600 / 4000 [ 15%]  (Warmup) 
## Chain 1 Iteration:  600 / 4000 [ 15%]  (Warmup) 
## Chain 2 Iteration:  800 / 4000 [ 20%]  (Warmup) 
## Chain 3 Iteration:  700 / 4000 [ 17%]  (Warmup) 
## Chain 4 Iteration:  700 / 4000 [ 17%]  (Warmup) 
## Chain 1 Iteration:  700 / 4000 [ 17%]  (Warmup) 
## Chain 2 Iteration:  900 / 4000 [ 22%]  (Warmup) 
## Chain 3 Iteration:  800 / 4000 [ 20%]  (Warmup) 
## Chain 4 Iteration:  800 / 4000 [ 20%]  (Warmup) 
## Chain 1 Iteration:  800 / 4000 [ 20%]  (Warmup) 
## Chain 3 Iteration:  900 / 4000 [ 22%]  (Warmup) 
## Chain 4 Iteration:  900 / 4000 [ 22%]  (Warmup) 
## Chain 2 Iteration: 1000 / 4000 [ 25%]  (Warmup) 
## Chain 2 Iteration: 1001 / 4000 [ 25%]  (Sampling) 
## Chain 1 Iteration:  900 / 4000 [ 22%]  (Warmup) 
## Chain 2 Iteration: 1100 / 4000 [ 27%]  (Sampling) 
## Chain 3 Iteration: 1000 / 4000 [ 25%]  (Warmup) 
## Chain 3 Iteration: 1001 / 4000 [ 25%]  (Sampling) 
## Chain 4 Iteration: 1000 / 4000 [ 25%]  (Warmup) 
## Chain 4 Iteration: 1001 / 4000 [ 25%]  (Sampling) 
## Chain 1 Iteration: 1000 / 4000 [ 25%]  (Warmup) 
## Chain 1 Iteration: 1001 / 4000 [ 25%]  (Sampling) 
## Chain 2 Iteration: 1200 / 4000 [ 30%]  (Sampling) 
## Chain 3 Iteration: 1100 / 4000 [ 27%]  (Sampling) 
## Chain 4 Iteration: 1100 / 4000 [ 27%]  (Sampling) 
## Chain 1 Iteration: 1100 / 4000 [ 27%]  (Sampling) 
## Chain 2 Iteration: 1300 / 4000 [ 32%]  (Sampling) 
## Chain 3 Iteration: 1200 / 4000 [ 30%]  (Sampling) 
## Chain 4 Iteration: 1200 / 4000 [ 30%]  (Sampling) 
## Chain 1 Iteration: 1200 / 4000 [ 30%]  (Sampling) 
## Chain 2 Iteration: 1400 / 4000 [ 35%]  (Sampling) 
## Chain 3 Iteration: 1300 / 4000 [ 32%]  (Sampling) 
## Chain 4 Iteration: 1300 / 4000 [ 32%]  (Sampling) 
## Chain 1 Iteration: 1300 / 4000 [ 32%]  (Sampling) 
## Chain 2 Iteration: 1500 / 4000 [ 37%]  (Sampling) 
## Chain 3 Iteration: 1400 / 4000 [ 35%]  (Sampling) 
## Chain 4 Iteration: 1400 / 4000 [ 35%]  (Sampling) 
## Chain 1 Iteration: 1400 / 4000 [ 35%]  (Sampling) 
## Chain 2 Iteration: 1600 / 4000 [ 40%]  (Sampling) 
## Chain 3 Iteration: 1500 / 4000 [ 37%]  (Sampling) 
## Chain 4 Iteration: 1500 / 4000 [ 37%]  (Sampling) 
## Chain 2 Iteration: 1700 / 4000 [ 42%]  (Sampling) 
## Chain 4 Iteration: 1600 / 4000 [ 40%]  (Sampling) 
## Chain 1 Iteration: 1500 / 4000 [ 37%]  (Sampling) 
## Chain 3 Iteration: 1600 / 4000 [ 40%]  (Sampling) 
## Chain 2 Iteration: 1800 / 4000 [ 45%]  (Sampling) 
## Chain 1 Iteration: 1600 / 4000 [ 40%]  (Sampling) 
## Chain 3 Iteration: 1700 / 4000 [ 42%]  (Sampling) 
## Chain 4 Iteration: 1700 / 4000 [ 42%]  (Sampling) 
## Chain 1 Iteration: 1700 / 4000 [ 42%]  (Sampling) 
## Chain 2 Iteration: 1900 / 4000 [ 47%]  (Sampling) 
## Chain 3 Iteration: 1800 / 4000 [ 45%]  (Sampling) 
## Chain 4 Iteration: 1800 / 4000 [ 45%]  (Sampling) 
## Chain 2 Iteration: 2000 / 4000 [ 50%]  (Sampling) 
## Chain 4 Iteration: 1900 / 4000 [ 47%]  (Sampling) 
## Chain 1 Iteration: 1800 / 4000 [ 45%]  (Sampling) 
## Chain 3 Iteration: 1900 / 4000 [ 47%]  (Sampling) 
## Chain 2 Iteration: 2100 / 4000 [ 52%]  (Sampling) 
## Chain 4 Iteration: 2000 / 4000 [ 50%]  (Sampling) 
## Chain 1 Iteration: 1900 / 4000 [ 47%]  (Sampling) 
## Chain 3 Iteration: 2000 / 4000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 2200 / 4000 [ 55%]  (Sampling) 
## Chain 4 Iteration: 2100 / 4000 [ 52%]  (Sampling) 
## Chain 3 Iteration: 2100 / 4000 [ 52%]  (Sampling) 
## Chain 1 Iteration: 2000 / 4000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 2300 / 4000 [ 57%]  (Sampling) 
## Chain 4 Iteration: 2200 / 4000 [ 55%]  (Sampling) 
## Chain 3 Iteration: 2200 / 4000 [ 55%]  (Sampling) 
## Chain 1 Iteration: 2100 / 4000 [ 52%]  (Sampling) 
## Chain 2 Iteration: 2400 / 4000 [ 60%]  (Sampling) 
## Chain 4 Iteration: 2300 / 4000 [ 57%]  (Sampling) 
## Chain 3 Iteration: 2300 / 4000 [ 57%]  (Sampling) 
## Chain 1 Iteration: 2200 / 4000 [ 55%]  (Sampling) 
## Chain 2 Iteration: 2500 / 4000 [ 62%]  (Sampling) 
## Chain 4 Iteration: 2400 / 4000 [ 60%]  (Sampling) 
## Chain 3 Iteration: 2400 / 4000 [ 60%]  (Sampling) 
## Chain 2 Iteration: 2600 / 4000 [ 65%]  (Sampling) 
## Chain 4 Iteration: 2500 / 4000 [ 62%]  (Sampling) 
## Chain 1 Iteration: 2300 / 4000 [ 57%]  (Sampling) 
## Chain 3 Iteration: 2500 / 4000 [ 62%]  (Sampling) 
## Chain 1 Iteration: 2400 / 4000 [ 60%]  (Sampling) 
## Chain 2 Iteration: 2700 / 4000 [ 67%]  (Sampling) 
## Chain 4 Iteration: 2600 / 4000 [ 65%]  (Sampling) 
## Chain 3 Iteration: 2600 / 4000 [ 65%]  (Sampling) 
## Chain 2 Iteration: 2800 / 4000 [ 70%]  (Sampling) 
## Chain 4 Iteration: 2700 / 4000 [ 67%]  (Sampling) 
## Chain 1 Iteration: 2500 / 4000 [ 62%]  (Sampling) 
## Chain 3 Iteration: 2700 / 4000 [ 67%]  (Sampling) 
## Chain 2 Iteration: 2900 / 4000 [ 72%]  (Sampling) 
## Chain 4 Iteration: 2800 / 4000 [ 70%]  (Sampling) 
## Chain 1 Iteration: 2600 / 4000 [ 65%]  (Sampling) 
## Chain 3 Iteration: 2800 / 4000 [ 70%]  (Sampling) 
## Chain 2 Iteration: 3000 / 4000 [ 75%]  (Sampling) 
## Chain 4 Iteration: 2900 / 4000 [ 72%]  (Sampling) 
## Chain 1 Iteration: 2700 / 4000 [ 67%]  (Sampling) 
## Chain 2 Iteration: 3100 / 4000 [ 77%]  (Sampling) 
## Chain 3 Iteration: 2900 / 4000 [ 72%]  (Sampling) 
## Chain 4 Iteration: 3000 / 4000 [ 75%]  (Sampling) 
## Chain 1 Iteration: 2800 / 4000 [ 70%]  (Sampling) 
## Chain 3 Iteration: 3000 / 4000 [ 75%]  (Sampling) 
## Chain 2 Iteration: 3200 / 4000 [ 80%]  (Sampling) 
## Chain 4 Iteration: 3100 / 4000 [ 77%]  (Sampling) 
## Chain 1 Iteration: 2900 / 4000 [ 72%]  (Sampling) 
## Chain 3 Iteration: 3100 / 4000 [ 77%]  (Sampling) 
## Chain 2 Iteration: 3300 / 4000 [ 82%]  (Sampling) 
## Chain 4 Iteration: 3200 / 4000 [ 80%]  (Sampling) 
## Chain 1 Iteration: 3000 / 4000 [ 75%]  (Sampling) 
## Chain 3 Iteration: 3200 / 4000 [ 80%]  (Sampling) 
## Chain 2 Iteration: 3400 / 4000 [ 85%]  (Sampling) 
## Chain 4 Iteration: 3300 / 4000 [ 82%]  (Sampling) 
## Chain 1 Iteration: 3100 / 4000 [ 77%]  (Sampling) 
## Chain 2 Iteration: 3500 / 4000 [ 87%]  (Sampling) 
## Chain 3 Iteration: 3300 / 4000 [ 82%]  (Sampling) 
## Chain 4 Iteration: 3400 / 4000 [ 85%]  (Sampling) 
## Chain 1 Iteration: 3200 / 4000 [ 80%]  (Sampling) 
## Chain 3 Iteration: 3400 / 4000 [ 85%]  (Sampling) 
## Chain 2 Iteration: 3600 / 4000 [ 90%]  (Sampling) 
## Chain 4 Iteration: 3500 / 4000 [ 87%]  (Sampling) 
## Chain 3 Iteration: 3500 / 4000 [ 87%]  (Sampling) 
## Chain 1 Iteration: 3300 / 4000 [ 82%]  (Sampling) 
## Chain 2 Iteration: 3700 / 4000 [ 92%]  (Sampling) 
## Chain 4 Iteration: 3600 / 4000 [ 90%]  (Sampling) 
## Chain 1 Iteration: 3400 / 4000 [ 85%]  (Sampling) 
## Chain 2 Iteration: 3800 / 4000 [ 95%]  (Sampling) 
## Chain 3 Iteration: 3600 / 4000 [ 90%]  (Sampling) 
## Chain 4 Iteration: 3700 / 4000 [ 92%]  (Sampling) 
## Chain 1 Iteration: 3500 / 4000 [ 87%]  (Sampling) 
## Chain 2 Iteration: 3900 / 4000 [ 97%]  (Sampling) 
## Chain 3 Iteration: 3700 / 4000 [ 92%]  (Sampling) 
## Chain 4 Iteration: 3800 / 4000 [ 95%]  (Sampling) 
## Chain 2 Iteration: 4000 / 4000 [100%]  (Sampling) 
## Chain 3 Iteration: 3800 / 4000 [ 95%]  (Sampling) 
## Chain 2 finished in 18.7 seconds.
## Chain 1 Iteration: 3600 / 4000 [ 90%]  (Sampling) 
## Chain 4 Iteration: 3900 / 4000 [ 97%]  (Sampling) 
## Chain 1 Iteration: 3700 / 4000 [ 92%]  (Sampling) 
## Chain 3 Iteration: 3900 / 4000 [ 97%]  (Sampling) 
## Chain 4 Iteration: 4000 / 4000 [100%]  (Sampling) 
## Chain 4 finished in 18.9 seconds.
## Chain 1 Iteration: 3800 / 4000 [ 95%]  (Sampling) 
## Chain 3 Iteration: 4000 / 4000 [100%]  (Sampling) 
## Chain 3 finished in 19.1 seconds.
## Chain 1 Iteration: 3900 / 4000 [ 97%]  (Sampling) 
## Chain 1 Iteration: 4000 / 4000 [100%]  (Sampling) 
## Chain 1 finished in 19.7 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 19.1 seconds.
## Total execution time: 19.8 seconds.
summary(model.5)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: pumps ~ 1 + condition + trial + (1 | participant) 
##    Data: pose_raw_df (Number of observations: 2400) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Group-Level Effects: 
## ~participant (Number of levels: 80) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.31      0.03     0.27     0.37 1.01     1324     2915
## 
## Population-Level Effects: 
##                    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              3.52      0.05     3.42     3.62 1.00      786     1868
## conditionexpansive    -0.02      0.07    -0.16     0.12 1.00      872     1592
## trial                  0.01      0.00     0.00     0.01 1.00    12288     8159
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     7.42      0.26     6.92     7.94 1.00    11903     8771
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pose_raw_df.bayesian_poisson.y <- pose_raw_df$pumps

pose_raw_df.bayesian_poisson.yrep <- posterior_predict(model.5, ndraws = 30, seed = 1234)

ppc_dens_overlay(y = pose_raw_df.bayesian_poisson.y,
                 yrep = pose_raw_df.bayesian_poisson.yrep)

summary(model.5)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: pumps ~ 1 + condition + trial + (1 | participant) 
##    Data: pose_raw_df (Number of observations: 2400) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Group-Level Effects: 
## ~participant (Number of levels: 80) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.31      0.03     0.27     0.37 1.01     1324     2915
## 
## Population-Level Effects: 
##                    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              3.52      0.05     3.42     3.62 1.00      786     1868
## conditionexpansive    -0.02      0.07    -0.16     0.12 1.00      872     1592
## trial                  0.01      0.00     0.00     0.01 1.00    12288     8159
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     7.42      0.26     6.92     7.94 1.00    11903     8771
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Step 3: model interpretation

The results of this model are on the log-odds scale. What do the coefficients mean? The simplest way is to simply transform the data into a more interpretable scale and visualise the results:

p.model.5 = pose_raw_df %>%
  ggplot() +
  geom_point(aes(x = pumps, y = condition, colour = condition), 
             position = position_jitter(height = 0.1), alpha = 0.7) +
  scale_color_theme() + 
  labs(y = "Condition") +
  theme(
    legend.position = "none", 
    axis.line.y = element_blank(), 
    axis.ticks.y = element_blank(),
    axis.title.y = element_blank()
  ) +
  scale_x_continuous(limits = c(0, 130), breaks = seq(0, 150, by = 30))

draws.model.5 = plot_predictions(model.5, df = crossing(condition = c('expansive', 'constrictive'), 
                                          trial = 0:29,
                                          participant = 0)) + 
    scale_x_continuous(breaks = seq(0, 150, by = 30)) +
    coord_cartesian(xlim = c(0, 150))

wrap_plots(
  draws.model.5, 
  p.model.5,
nrow = 2)

We generate the average of 30 trials using an average participant.

model.5.posteriors <-  model.5 %>% 
    epred_draws(crossing(condition = c('expansive', 'constrictive'),     
                       trial = 0:29),
                re_formula = NA,
                allow_new_levels  = FALSE) %>% 
  mutate(model = 'model 5')  %>% 
  group_by(.draw, condition) %>% 
  summarise(.epred = mean(.epred))

model.5.posteriors %>%
  ungroup() %>%
  compare_levels(.epred, by = condition) %>%
  median_qi(.width = .95) %>%
  ggplot() + 
  geom_pointinterval(aes(x = .epred, y = condition, xmin = .lower, xmax = .upper)) +
  geom_vline(xintercept = 0, linetype = 2, color = "#979797") +
  scale_x_continuous(breaks = seq(-10, 10, by = 2)) +
  coord_cartesian(xlim = c(-10, 10)) +
  xlab("Difference in Mean") +
  theme(
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16),
    axis.title.y = element_blank()
  )

Plot out the posteriors for mean.

model.5.posteriors %>% 
 ggplot(aes(x = .epred, fill = condition)) +
    stat_halfeye(.width = .95) +
    scale_color_theme() +
    facet_wrap(condition ~ ., ncol = 1) + 
    theme_density_x + 
    scale_x_continuous(limits = c(30, 50), breaks = seq(0, 60, by = 10)) +
    ggtitle(paste0('Posterior means of ', deparse(substitute(model))))

From this, we can see that there does not appear to be a difference between the two conditions.

Session info

sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] magick_2.7.3        gganimate_1.0.8     cmdstanr_0.5.2     
##  [4] knitr_1.39          ggplot2_3.4.0       bayesplot_1.9.0    
##  [7] ggdist_3.1.1        tidybayes_3.0.2     brms_2.17.0        
## [10] Rcpp_1.0.8.3        modelr_0.1.8        broom.mixed_0.2.9.4
## [13] broom_1.0.0         patchwork_1.1.2     gtools_3.9.2.1     
## [16] forcats_0.5.1       tidyr_1.2.0         purrr_0.3.4        
## [19] tibble_3.1.7        dplyr_1.0.9        
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-3     ellipsis_0.3.2       ggridges_0.5.3      
##   [4] markdown_1.1         base64enc_0.1-3      rstudioapi_0.13     
##   [7] listenv_0.8.0        furrr_0.3.1          farver_2.1.1        
##  [10] rstan_2.21.5         bit64_4.0.5          svUnit_1.0.6        
##  [13] DT_0.23              fansi_1.0.3          mvtnorm_1.1-3       
##  [16] diffobj_0.3.5        bridgesampling_1.1-2 codetools_0.2-18    
##  [19] splines_4.2.2        shinythemes_1.2.0    jsonlite_1.8.0      
##  [22] shiny_1.7.1          readr_2.1.2          compiler_4.2.2      
##  [25] backports_1.4.1      assertthat_0.2.1     Matrix_1.5-1        
##  [28] fastmap_1.1.0        cli_3.4.1            tweenr_1.0.2        
##  [31] later_1.3.0          htmltools_0.5.2      prettyunits_1.1.1   
##  [34] tools_4.2.2          igraph_1.3.1         coda_0.19-4         
##  [37] gtable_0.3.0         glue_1.6.2           reshape2_1.4.4      
##  [40] posterior_1.2.1      jquerylib_0.1.4      vctrs_0.5.1         
##  [43] nlme_3.1-160         crosstalk_1.2.0      tensorA_0.36.2      
##  [46] xfun_0.31            stringr_1.4.0        globals_0.15.0      
##  [49] ps_1.7.0             mime_0.12            miniUI_0.1.1.1      
##  [52] lifecycle_1.0.3      future_1.26.1        zoo_1.8-10          
##  [55] scales_1.2.0         vroom_1.5.7          colourpicker_1.1.1  
##  [58] hms_1.1.1            promises_1.2.0.1     Brobdingnag_1.2-7   
##  [61] parallel_4.2.2       inline_0.3.19        shinystan_2.6.0     
##  [64] yaml_2.3.5           gridExtra_2.3        loo_2.5.1           
##  [67] StanHeaders_2.21.0-7 sass_0.4.1           stringi_1.7.6       
##  [70] highr_0.9            dygraphs_1.1.1.6     gifski_1.6.6-1      
##  [73] checkmate_2.1.0      pkgbuild_1.3.1       rlang_1.0.6         
##  [76] pkgconfig_2.0.3      matrixStats_0.62.0   distributional_0.3.0
##  [79] evaluate_0.15        lattice_0.20-45      labeling_0.4.2      
##  [82] rstantools_2.2.0     htmlwidgets_1.5.4    cowplot_1.1.1       
##  [85] bit_4.0.4            tidyselect_1.1.2     processx_3.5.3      
##  [88] parallelly_1.31.1    plyr_1.8.7           magrittr_2.0.3      
##  [91] R6_2.5.1             generics_0.1.3       DBI_1.1.2           
##  [94] pillar_1.7.0         withr_2.5.0          xts_0.12.1          
##  [97] abind_1.4-5          crayon_1.5.1         arrayhelpers_1.1-0  
## [100] utf8_1.2.2           tzdb_0.3.0           rmarkdown_2.14      
## [103] progress_1.2.2       grid_4.2.2           data.table_1.14.2   
## [106] callr_3.7.0          threejs_0.3.3        digest_0.6.29       
## [109] xtable_1.8-4         httpuv_1.6.5         RcppParallel_5.1.5  
## [112] stats4_4.2.2         munsell_0.5.0        bslib_0.3.1         
## [115] shinyjs_2.1.0